Xtensor tips
1. Installation
2. Areas/Programming/CPP/How to install Xtensor
2. Compile
> g++ xtensor_performance.cpp -o ./output/xtensor_performance -m64 -I ./libs -I"${MKLROOT}/include" -L${MKLROOT}/lib -Wl,-rpath=${MKLROOT}/lib -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm -ldl -O3
where ${MKLROOT}
is referred to the installation location. See eigen tips for details.
3. Performance test
3.1. Matrix multiplication
#define XTENSOR_USE_XSIMD
#define XTENSOR_USE_OPENMP
#include <ctime>
#include <chrono>
#include <xtensor/xarray.hpp>
#include <xtensor/xrandom.hpp>
#include <xtensor-blas/xlinalg.hpp>
std::vector<double> RunMatMultTest(int size = 100, int loops = 10)
{
xt::random::seed(time(NULL));
xt::xarray<double> A = xt::random::randn<double>({size, size});
std::vector<double> duration_list(loops);
for (int i = 0; i <= loops; ++i)
{
auto t0 = std::chrono::steady_clock::now();
xt::xarray<double> B = xt::linalg::dot(A, A);
auto t1 = std::chrono::steady_clock::now();
std::chrono::duration<double> duration = t1 - t0;
duration_list[i] = duration.count();
}
return duration_list;
}
int main()
{
int size = 1000;
int loops = 10;
std::cout << "please entr the size of the suqare matrix, default is 1000" << std::endl;
std::cin >> size;
std::cout << "please entr the loop times, default is 10" << std::endl;
std::cin >> loops;
std::vector<double> duration_list = RunEigDecompTest(size, loops);
for (int i=0; i<loops; ++i){
std::cout << duration_list[i] << " s\n";
}
std::cout <<"The mean time is: ";
std::cout <<std::accumulate(duration_list.begin(), duration_list.end(), 0.0) / loops << " s\n";
std::cin.get();
return 0;
}
It takes about
3.2. Eigenvalue Decomposition
std::vector<double> RunEigDecompTest(int size = 100, int loops = 10)
{
xt::random::seed(time(NULL));
xt::xarray<double> A = xt::random::randn<double>({size, size});
std::vector<double> duration_list(loops);
for (int i = 0; i <= loops; ++i)
{
auto t0 = std::chrono::steady_clock::now();
auto B = xt::linalg::eig(A);
auto t1 = std::chrono::steady_clock::now();
std::chrono::duration<double> duration = t1 - t0;
duration_list[i] = duration.count();
}
return duration_list;
It takes about
It takes about
It takes about
use xt::linalg::eigh ()
It takes about
It takes about
It takes about
3.3. QR factorization
std::vector<double> RunQRTest(int size = 100, int loops = 10)
{
xt::random::seed(time(NULL));
xt::xarray<double> A = xt::random::randn<double>({size, size});
std::vector<double> duration_list(loops);
for (int i = 0; i <= loops; ++i)
{
auto t0 = std::chrono::steady_clock::now();
auto B = xt::linalg::qr(A);
auto t1 = std::chrono::steady_clock::now();
std::chrono::duration<double> duration = t1 - t0;
duration_list[i] = duration.count();
}
return duration_list;
}
It takes about
It takes about
It takes about
While the numpy version takes
3.4. xt :: view
The codes are referenced from Roman Poya
template <typename T>
T finite_difference_range_implxtensor<T,2> &u, int num
{
xt::xtensor<T, 2> u_old = u;
view(u, range(1, num - 1), range(1, num - 1)) =
((view(u_old, range(0, num - 2), range(1, num - 1)) + view(u_old, range(2, num - 0), range(1, num - 1)) +
view(u_old, range(1, num - 1), range(0, num - 2)) + view(u_old, range(1, num - 1), range(2, num - 0))) *
4.0 +
view(u_old, range(0, num - 2), range(0, num - 2)) + view(u_old, range(0, num - 2), range(2, num - 0)) +
view(u_old, range(2, num - 0), range(0, num - 2)) + view(u_old, range(2, num - 0), range(2, num - 0))) /
20.0;
return xt::linalg::norm(u - u_old);
}
std::vector<double> RunFiniteDiffTest(int size = 100, int loops = 10)
{
xt::random::seed(time(NULL));
xt::xtensor<double, 2> A = xt::random::randn<double>({size, size});
std::vector<double> duration_list(loops);
for (int i = 0; i <= loops; ++i)
{
auto t0 = std::chrono::steady_clock::now();
auto result = finite_difference_range_impl<double> (A, size);
auto t1 = std::chrono::steady_clock::now();
std::chrono::duration<double> duration = t1 - t0;
duration_list[i] = duration.count();
}
return duration_list;
It takes about
It takes about
It takes about